from collections import Counter
from math import sqrt
Marital status | Buyer |
---|---|
Divorced | Yes |
Married | No |
Unmarried | Yes |
Unmarried | Yes |
Married | No |
Divorced | Yes |
Divorced | Yes |
Divorced | Yes |
Divorced | Yes |
Unmarried | No |
data = [
["Divorced", "Yes"],
["Divorced", "Yes"],
["Married", "No"],
["Unmarried", "Yes"],
["Unmarried", "Yes"],
["Married", "No"],
["Divorced", "Yes"],
["Divorced", "Yes"],
["Divorced", "Yes"],
["Unmarried", "No"]
]
def separate(data):
separated = dict()
for i in range(len(data)):
vector = data[i]
data_class = vector[0]
if data_class not in separated:
separated[data_class] = list()
separated[data_class].append(vector[-1])
return separated
sep_data = separate(data)
print(sep_data)
def cardinality(data):
cardinality = dict()
for key in data.keys():
if key not in cardinality:
cardinality[key] = list()
cardinality[key].append(len(data[key]))
cardinality[key].append(data[key].count("Yes"))
cardinality[key].append(data[key].count("No"))
return cardinality
car_data = cardinality(sep_data)
print(car_data)
class_attribute0_cardinality == |Yes| + |No|
class_attribute1_cardinality == |Yes|
class_attribute2_cardinality == |No|
def counter(data):
counter = [0, 0]
for key in data.keys():
counter[0] += data[key][1]
counter[1] += data[key][2]
return counter
count = counter(car_data)
print(count)
int0 = |Yes|
int1 = |No|
$P(class | data) = \frac{P(data | class) P(class)}{P(data)}$
def calc(data, class_value):
car_data = cardinality(separate(data))
car_ans = counter(car_data)
p0 = p[class_value][1]/ans[0]*ans[0]/(ans[0]+ans[1])
p1 = p[class_value][2]/ans[1]*ans[1]/(ans[0]+ans[1])
if p0 > p1:
return ["Yes", p0, p1]
else:
return ["No", p0, p1]
calc(data, "Unmarried")
['Answer', probability_for_yes, probability_for_no]